'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: 1_study_blob_example.py
* @Time: 2025/8/27
* @All Rights Reserve By Brtc
'''
from typing import Iterator

from langchain_core.document_loaders import  BaseBlobParser
from langchain_core.documents import Document
from langchain_core.documents.base import Blob


class CustomParser(BaseBlobParser):
    """自定义Blob文档加载器"""
    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        line_number = 0
        with blob.as_bytes_io() as f:
            for one_line in f:
                yield Document(
                    page_content = one_line,
                    metadata = {"source":blob.source, "line_number": line_number})
                line_number += 1

#blob = Blob(data = "hello \r\n world") # 1、读取数据, 大家可以课后去试试
blob = Blob.from_path("./eshop_goods.txt")
parser = CustomParser()
docs = list(parser.lazy_parse(blob)) # 2、解析数据

for onedoc in docs:
    print(onedoc)

print(len(docs))